Dieses Notebook bereitet die Daten für die Intelligent Zoning Engine vor. Es speichert

Bereits in anderen scripten wurde vorbereitet:

Die Daten werden wiefolgt vorbereitet:

TODO: - die sozioökonomischen Faktoren werden aus den Wahlbezirken auf die Blöcke hochgerechnet (https://github.com/berlinermorgenpost/cogran)

Laden der Daten

sampled_buildings = read_rds('output/sampled_buildings.rds')
bez = readOGR('download/RBS_OD_BEZ_2015_12.geojson', layer = 'OGRGeoJSON', stringsAsFactors = FALSE)
OGR data source with driver: GeoJSON 
Source: "download/RBS_OD_BEZ_2015_12.geojson", layer: "OGRGeoJSON"
with 13 features
It has 2 fields
blk = readOGR('download/RBS_OD_BLK_2015_12.geojson', layer = 'OGRGeoJSON', stringsAsFactors = FALSE)
OGR data source with driver: GeoJSON 
Source: "download/RBS_OD_BLK_2015_12.geojson", layer: "OGRGeoJSON"
with 15720 features
It has 4 fields
lor = readOGR('download/RBS_OD_LOR_2015_12.geojson', layer = 'OGRGeoJSON', stringsAsFactors = FALSE)
OGR data source with driver: GeoJSON 
Source: "download/RBS_OD_LOR_2015_12.geojson", layer: "OGRGeoJSON"
with 447 features
It has 8 fields
re_schulstand = readOGR('download/re_schulstand.geojson', layer = 'OGRGeoJSON', stringsAsFactors = FALSE)
OGR data source with driver: GeoJSON 
Source: "download/re_schulstand.geojson", layer: "OGRGeoJSON"
with 709 features
It has 20 fields

Schulwege

route_matrix = read_rds('output/route_matrix.rds')

Schulkapazitäten und Einwohnerzahler auf LOR-Ebene

kapas = read_csv('download/anmeldezahlen.csv') %>% filter(grepl('G', Schulnummer)) %>% filter(!is.na(`Plätze`))
Parsed with column specification:
cols(
  Bezirk = col_character(),
  Schulnummer = col_character(),
  Schulname = col_character(),
  Plätze = col_integer(),
  Anmeldungen = col_character()
)
einwohner_lor = read_delim('download/EWR201512E_Matrix.csv', delim=';')
Parsed with column specification:
cols(
  .default = col_character(),
  ZEIT = col_integer(),
  STADTRAUM = col_integer(),
  E_E = col_number(),
  E_EM = col_number(),
  E_EW = col_number(),
  E_E50_55 = col_number(),
  E_E25U55 = col_number(),
  E_E55U65 = col_number(),
  E_E65U80 = col_number()
)
See spec(...) for full column specifications.

Überprüfung der Vollständigkeit der Daten über Anmeldezahlen/Kapazitäten

re_schulstand_df = re_schulstand %>% as.data.frame() %>% rename(lon=coords.x1, lat=coords.x2)
re_schulstand_df %>% filter(grepl('G', spatial_name)) %>% mutate(BEZIRK=enc2utf8(as.character(BEZIRK))) %>%
  group_by(BEZIRK) %>% summarise(`Anzahl Schulen` = n()) %>%
  rename(Bezirk=BEZIRK) %>% left_join(kapas %>% group_by(Bezirk) %>% summarise(`Mit Kapazität` = n()))
Joining, by = "Bezirk"

Für welche Bezirke haben wir für alle Schulen Kapazitäten gegeben?

re_schulstand %>% as.data.frame() %>% filter(grepl('G', spatial_name)) %>% mutate(BEZIRK=enc2utf8(as.character(BEZIRK))) %>% group_by(BEZIRK) %>% summarise(`Anzahl Schulen` = n()) %>%
  rename(Bezirk=BEZIRK) %>% left_join(kapas %>% group_by(Bezirk) %>% summarise(`Mit Kapazität` = n())) %>% filter(`Anzahl Schulen` == `Mit Kapazität`)
Joining, by = "Bezirk"

Überprüfung ob die Liste der Schulen und Liste der Schulen mit Kapazitätsinformationen gleich sind:

bezirk = 'Tempelhof-Schöneberg'
schulen_mit_kapa = kapas %>% filter(Bezirk == bezirk) %>% .$Schulnummer
schulen_mit_kapa
 [1] "07G01" "07G02" "07G03" "07G05" "07G06" "07G07" "07G10" "07G12"
 [9] "07G13" "07G14" "07G15" "07G16" "07G17" "07G18" "07G19" "07G20"
[17] "07G21" "07G22" "07G23" "07G24" "07G25" "07G26" "07G27" "07G28"
[25] "07G29" "07G30" "07G31" "07G32" "07G34" "07G35" "07G36" "07G37"
07G01

07G02

07G03

07G05

07G06

07G07

07G10

07G12

07G13

07G14

07G15

07G16

07G17

07G18

07G19

07G20

07G21

07G22

07G23

07G24

07G25

07G26

07G27

07G28

07G29

07G30

07G31

07G32

07G34

07G35

07G36

07G37
grundschulen = re_schulstand %>% as.data.frame() %>% filter(grepl('G', spatial_name)) %>% filter(BEZIRK == bezirk) %>% .$spatial_name
'In Anmeldeliste, fehlt in Schulstand'
[1] "In Anmeldeliste, fehlt in Schulstand"
In Anmeldeliste, fehlt in Schulstand
setdiff(schulen_mit_kapa, grundschulen)
character(0)
'In re_schulstand, fehlt in Anmeldeliste'
[1] "In re_schulstand, fehlt in Anmeldeliste"
In re_schulstand, fehlt in Anmeldeliste
setdiff(grundschulen, schulen_mit_kapa)
character(0)
map = get_map('Berlin')
Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Berlin&zoom=10&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Berlin&sensor=false
re_schulstand_df_w_kapas = re_schulstand_df %>% left_join(kapas, by=c('spatial_name'='Schulnummer'))

Plot aller Schulen, mit der Info, ob Kapazitätsinformationen verfügbar sind.

data = re_schulstand_df_w_kapas %>% filter(grepl('G', spatial_name)) %>% filter(BEZIRK==bezirk) %>% mutate(missing.capa=is.na(`Plätze`))
ggmap(map) + geom_point(aes(lon, lat, color=missing.capa), data=data) +
    coord_map(xlim=c(min(data$lon)-0.01, max(data$lon)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))

Filter auf Schulen mit Kapazitätsinformationen (für T-S sind das alle):

relevant_schools = re_schulstand_df_w_kapas %>% filter(grepl('G', spatial_name)) %>% filter(BEZIRK==bezirk & !is.na(`Plätze`)) %>% .$spatial_name
relevant_schools
 [1] "07G01" "07G02" "07G03" "07G05" "07G06" "07G07" "07G10" "07G12"
 [9] "07G13" "07G14" "07G15" "07G16" "07G17" "07G18" "07G19" "07G20"
[17] "07G21" "07G22" "07G23" "07G24" "07G25" "07G26" "07G27" "07G28"
[25] "07G29" "07G30" "07G31" "07G32" "07G34" "07G35" "07G36" "07G37"
07G01

07G02

07G03

07G05

07G06

07G07

07G10

07G12

07G13

07G14

07G15

07G16

07G17

07G18

07G19

07G20

07G21

07G22

07G23

07G24

07G25

07G26

07G27

07G28

07G29

07G30

07G31

07G32

07G34

07G35

07G36

07G37

Mapping Bezirk->LOR->Block

df_bez = as.data.frame(bez)
df_lor = as.data.frame(lor)
df_blk = as.data.frame(blk)

Sanity-Check: LORs und Blöcke im Bezirk

bez_id = filter(df_bez, BEZNAME == bezirk)$BEZ
relevant_lors = df_lor %>% filter(BEZ == bez_id)
relevant_blks = df_blk %>% filter(BEZ == bez_id)
ggplot() + geom_path(aes(x=long, y=lat, group=group), data=lor[lor$BEZ == bez_id,]) + coord_map() + geom_path(aes(x=long, y=lat, group=group), data=bez, color='red')
Regions defined for each Polygons
Regions defined for each Polygons

Blöcke im Bezirk

ggplot() + geom_path(aes(x=long, y=lat, group=group), data=blk[blk$BEZ == bez_id,]) + coord_map() + geom_path(aes(x=long, y=lat, group=group), data=bez[bez$BEZ == bez_id,], color='red')
Regions defined for each Polygons
Regions defined for each Polygons

Kinder im Bezirk auf Blöcke hochrechnen

Über die Einwohnerinformationen in RBS_OD_BLK_2015_12.geojson kann EWR201512E_Matrix.csv von LOR-Ebene auf Blockebene hochgerechnet werden.

TODO: Stattdessen mit https://github.com/berlinermorgenpost/cogran machen?

Plot der 6-Jährigen nach EWR201512E_Matrix.csv

Wir verwenden das mittel der 5- und 6-Jährigen.

TODO neue Daten von Torres? TODO Prognose?

relevant_ewr = einwohner_lor %>%
  select(RAUMID, E_E05_06, E_E06_07) %>%
  filter(RAUMID %in% relevant_lors$PLR) %>%
  # Schnitt der 5 und 6-Jährigen
  mutate(kids=(as.numeric(gsub(',','.',E_E06_07))+as.numeric(gsub(',','.',E_E05_06)))/2) %>% as.data.frame()

data = tidy(lor[lor$BEZ == bez_id,], region='PLR') %>% inner_join(relevant_ewr, by=c('id'='RAUMID'))
ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group, fill=kids), data=data) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))

Plot der Einwohner auf Blockebene

data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% inner_join(df_blk, by=c('id'='BLK')) %>% mutate(Einw=ifelse(Einw==0, NA, Einw))
0
[1] 0
ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group, fill=Einw), data=data) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))

Hochrrechnung auf Blöcke, Strukturquote

Strukturquote = 0.9

kids_in_blks = relevant_blks %>%
  group_by(PLR) %>%
  mutate(EinwRatio = Einw/sum(Einw)) %>%
  ungroup %>%
  left_join(relevant_ewr, by=c('PLR'='RAUMID')) %>%
  mutate(kids = EinwRatio*kids) %>%
  mutate(kids = Strukturquote*kids) %>% # Strukturquote
  select(BEZ, PLR, BLK, Einw, kids) %>%
  as.data.frame()
row.names(kids_in_blks) = kids_in_blks$BLK

data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% inner_join(kids_in_blks, by=c('id'='BLK')) %>% mutate(kids=ifelse(kids==0, NA, kids), Einw=ifelse(Einw==0, NA, Einw))

ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group, fill=kids), data=data) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))

Verfügbare Plätze

relevant_kapas = kapas %>% select(Schulnummer, Kapa=`Plätze`) %>% filter(Schulnummer %in% relevant_schools) %>% as.data.frame()
#row.names(relevant_kapas) = relevant_kapas$Schulnummer

Überprüfung der Summe der Kapazitäten, Anmeldungen und Kinderstatistiken

'Summe Kapas'
[1] "Summe Kapas"
Summe Kapas
relevant_kapas %>% .$Kapa %>% sum
[1] 2584
'Anmeldungen'
[1] "Anmeldungen"
Anmeldungen
kapas %>% mutate(Anmeldungen = as.numeric(gsub('[^0-9]', '', Anmeldungen))) %>% filter(Schulnummer %in% relevant_schools) %>% .$Anmeldungen %>% sum
[1] 2752
'Kids laut Statistik'
[1] "Kids laut Statistik"
Kids laut Statistik
kids_in_blks$kids %>% sum
[1] 2620.8
relevant_ewr$kids %>% sum
[1] 2912

Schulwege von Blöcken zu Schulen aggregieren

Für jedes Wohngebäude suchen wir den zugehörigen Block

residential_buildings_blocks = sampled_buildings %>% inner_join(df_blk) %>% filter(BEZ == bez_id)
Joining, by = "BLK"
residential_buildings_blocks
routes_from_blks = residential_buildings_blocks %>%
  left_join(route_matrix %>% filter(dst %in% relevant_schools), by=c('OI'='src'))
head(routes_from_blks)

Plot der relevanten Blöcke (mit Wohngebäuden)

data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% inner_join(routes_from_blks %>% group_by(BLK) %>% summarise(n=n()), by=c('id'='BLK'))
ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group), fill='red', data=data) +
  #geom_point(aes(x=lon, y=lat), data=rb_df, color='black', size=0.01) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))

travel_from_blks = routes_from_blks %>% as.data.frame() %>% group_by(BLK, dst) %>% summarise(min=min(distance), avg=mean(distance), med=median(distance), max=max(distance)) %>% ungroup
travel_from_blks

Plot der Blöcke mit Färbung nach durchschnittlichem Weg zur nächsten Schule

data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% left_join(travel_from_blks %>% group_by(BLK) %>% top_n(1, -avg), by=c('id'='BLK'))
ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group, fill=-avg), data=data) +
  geom_point(aes(lon, lat), color='red', data = re_schulstand_df %>% filter(BEZIRK==bezirk & SCHULART=='Grundschule')) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))

travel_matrix = travel_from_blks %>% select(BLK, dst, avg) %>% spread(dst, avg)
dim(travel_matrix)
[1] 1012   33
travel_matrix

Sozioökonomische Daten

Wir haben Sozioökonomische Daten in den Wahlbezirken. Strategie: - Schneiden der Wahlbezirke mit den Blöcken - Übernahme des Prozentwertes vom Wahlbezirk für jeden (Unter-)block - Zuordnung zu jedem Block und Vereinigung durch Flächen/Wohnhaus-gewichtetes Mittel des Prozentwertes

UWB = readOGR('download/RBS_OD_UWB_AGH2016.geojson', layer = 'OGRGeoJSON', stringsAsFactors = FALSE)
OGR data source with driver: GeoJSON 
Source: "download/RBS_OD_UWB_AGH2016.geojson", layer: "OGRGeoJSON"
with 1779 features
It has 4 fields
UWB$ID = paste0(UWB$BEZ, UWB$UWB)
sozio_UWB = read_excel('download/DL_BE_AH2016_Strukturdaten.xlsx', sheet = 3) %>% select(ID, sgbIIu65=`Einwohner unter 65 in SGB II 2014 Prozent`)
UWB = UWB %>% sp::merge(sozio_UWB, by='ID')

ggplot(broom::tidy(UWB, region='ID') %>% inner_join(UWB %>% as.data.frame, by=c('id'='ID'))) + geom_polygon(aes(x=long, y=lat, group=group, fill=sgbIIu65)) + coord_map()

Check for self intersections

wgs84 = CRS(proj4string(blk))
ea_projection = CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs")
blk_in_bez = blk[blk$BEZ == bez_id,]
gIsValid(blk_in_bez)
[1] TRUE
blk_in_bez = gBuffer(spTransform(blk_in_bez, ea_projection), byid=T, width = -0.1)
any(xor(gIntersects(blk_in_bez, byid=T, returnDense = T), diag(1, length(blk_in_bez)) == 1))
[1] FALSE
plot(blk_in_bez)

#gIntersects(UWB[UWB$BEZ == '07',], byid=TRUE)
uwb_in_bez = gBuffer(spTransform(UWB[UWB$BEZ == '07',], ea_projection), byid=T, width=-0.1)
gIsValid(uwb_in_bez)
[1] TRUE
any(xor(gIntersects(uwb_in_bez, byid=T, returnDense = T), diag(1, length(uwb_in_bez)) == 1))
[1] FALSE
plot(uwb_in_bez)

intersection = gIntersection(uwb_in_bez, blk_in_bez, byid = T, drop_lower_td = T)

gIsValid(intersection)
[1] TRUE
plot(intersection)

intersection_uwb_data = intersection %>% over(uwb_in_bez)
intersection_blk_data = intersection %>% over(blk_in_bez)
intersection_area = gArea(intersection, byid=T)
intersection_data = cbind(
    intersection_uwb_data %>% select(UWB_ID=ID, sgbIIu65),
    intersection_blk_data %>% select(BLK, BLK_Einw=Einw),
    data.frame(area=intersection_area) # FIXME how else to normalize?
    )
length(intersection)
[1] 1219
nrow(blk_in_bez)
[1] 1201
# did we miss any blocks?
setdiff(blk_in_bez$BLK, intersection_data$BLK)
character(0)

Pro Block mische die SGB-Werte der Unterblöcke gewichtet nach Fläche. FIXME - besser nach Anzahl der Wohnhäuser?

sgbII_blk = intersection_data %>%
  group_by(BLK) %>%
  summarise(sgbIIu65=sum(sgbIIu65*area)/sum(area)/100)
ggplot(broom::tidy(spTransform(blk_in_bez, wgs84), region='BLK') %>% left_join(sgbII_blk, by=c('id'='BLK'))) + geom_polygon(aes(x=long, y=lat, group=group, fill=sgbIIu65)) + coord_map()

Adjazenz von Blöcken

row.names(blk_in_bez) = blk_in_bez$BLK
buffeded_blk_in_bez = gBuffer(blk_in_bez, byid=T, width=40)
adjacency = gIntersects(gBuffer(blk_in_bez, byid=T, width=40), byid = T)
rownames(adjacency) = blk_in_bez$BLK
colnames(adjacency) = blk_in_bez$BLK

adjacency_df = adjacency %>% as.data.frame() %>% mutate(from=rownames(.)) %>%
  gather(to, connected, -from) %>% filter(connected) %>%
  inner_join(spTransform(blk_in_bez, wgs84) %>% coordinates() %>% as.data.frame() %>% rename(from_long=V1, from_lat=V2) %>% mutate(from=rownames(.))) %>%
  inner_join(spTransform(blk_in_bez, wgs84) %>% coordinates() %>% as.data.frame() %>% rename(to_long=V1, to_lat=V2) %>% mutate(to=rownames(.)))
Joining, by = "from"
Joining, by = "to"
ggplot() +
  geom_polygon(aes(x=long, y=lat, group=group), fill='gray', data=broom::tidy(spTransform(blk_in_bez, wgs84), region='BLK')) + 
  geom_segment(aes(x=from_long, y=from_lat, xend=to_long, yend=to_lat), size=0.1, color='black', data=adjacency_df) +
  theme_nothing() + coord_map()
Warning: `panel.margin` is deprecated. Please use `panel.spacing` property
instead

ggsave('figs/adjacency.pdf')
Saving 7 x 5 in image
adjacency_df %>% filter(connected & from != to) %>% select(from, to) %>% write_csv('app/data/adjacency.csv')

Relevante Daten auswählen

optim_kapas = relevant_kapas
optim_kids_in_blks = kids_in_blks %>% filter(kids > 0) %>% inner_join(travel_matrix, by='BLK') %>% select(BLK, kids) %>% mutate(kids=kids)
nrow(optim_kids_in_blks)
[1] 1012
nrow(optim_kapas)
[1] 32
select_schools = as.character(optim_kapas$Schulnummer)
select_blks = as.character(optim_kids_in_blks$BLK)

optim_matrix = inner_join(optim_kids_in_blks, travel_matrix, by='BLK')[select_schools]

dim(optim_matrix)
[1] 1012   32
optim_kapas$Kapa %>% sum
[1] 2584
optim_kids_in_blks$kids %>% sum
[1] 2611.995

Naive (initiale) Zuordnung: Jeder Block zur nächsten Schule

solution = optim_matrix %>% mutate(BLK=optim_kids_in_blks$BLK) %>% gather(school, dist, -BLK) %>% group_by(BLK) %>% top_n(1, -dist) %>% ungroup

optim_matrix %>% t %>% as.data.frame %>% summarise_each(funs(min)) %>% sum()
[1] 789029.5
solines = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school')) %>% inner_join(cbind(as.data.frame(coordinates(blk[blk$BEZ == bez_id,])), blk[blk$BEZ == bez_id,]@data['BLK']))
Joining, by = "BLK"
data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% left_join(solution, by=c('id'='BLK'))
ggmap(map, darken = c(0.5, 'white')) + geom_polygon(aes(x=long, y=lat, group=group, fill=school), data=data) +
  geom_segment(aes(x=V1,y=V2,xend=lon,yend=lat), data=solines, size=0.3) +
  geom_point(aes(lon, lat), color='black', size=2, data = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school'))) +
  geom_point(aes(lon, lat, color=spatial_name), data = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school'))) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01)) +
  guides(color=F, fill=F)

Darstellung der Zuordnung als Tabelle

library(formattable)
solution %>% inner_join(optim_kids_in_blks, by='BLK') %>% inner_join(travel_from_blks, by=c('BLK'='BLK', 'school'='dst')) %>%
  group_by(school) %>% summarise(
    kids=sum(kids),
    num_blocks=n(),
    min_dist=min(min),
    avg_dist=mean((kids*avg)/sum(kids)),
    max_dist=max(max)
  ) %>%
  inner_join(relevant_kapas, by=c('school'='Schulnummer')) %>%
  mutate(
    utilization=kids/Kapa
  ) %>% select(
   Schule=school,
   `Blöcke`=num_blocks,
   Kapazität=Kapa,
   Kinder=kids,
   Auslastung=utilization,
   `Weg (min)`=min_dist,
   `Weg (Ø)`=avg_dist,
   `Weg (max)`=max_dist
  ) %>%
  formattable(
    list(
      Kinder = formatter("span", x ~ digits(x, 2)),
      Auslastung = formatter("span",
        style = x ~ style(color = ifelse(x < 1, "green", "red")),
        x ~ icontext(ifelse(x < 1, "ok", "remove"), percent(x))
      ),
      `Weg (Ø)` = proportion_bar("lightblue"),
      `Weg (min)` = proportion_bar("lightblue"),
      `Weg (max)` = proportion_bar("lightblue")
    )
  )
Schule Blöcke Kapazität Kinder Auslastung Weg (min) Weg (Ø) Weg (max)
07G01 15 66 74.18 112.39% 255.739151 632.0744 1396.8593
07G02 20 98 47.99 48.97% 230.477219 621.3752 1145.7729
07G03 16 73 60.89 83.41% 4.655441 439.9467 1180.3876
07G05 19 78 96.19 123.33% 107.536011 571.3078 1070.8099
07G06 26 55 71.27 129.58% 262.891296 795.8270 1395.3971
07G07 14 81 33.88 41.82% 132.709625 729.0923 1595.5302
07G10 29 112 130.44 116.46% 9.067926 621.7213 1521.6644
07G12 12 75 32.39 43.19% 19.516815 342.8940 670.5930
07G13 26 82 126.35 154.09% 36.279259 458.8285 1112.2148
07G14 32 78 80.91 103.73% 80.919022 418.8679 768.5562
07G15 48 104 202.25 194.48% 123.392151 818.0813 1615.6770
07G16 24 104 63.87 61.41% 75.267059 518.5303 1081.7244
07G17 24 100 70.80 70.80% 44.943558 362.8416 897.0190
07G18 16 44 48.82 110.95% 63.255936 337.0313 730.1954
07G19 36 112 108.22 96.62% 67.527443 1092.9489 2255.8521
07G20 40 81 139.24 171.90% 143.088776 746.6034 1633.1127
07G21 22 72 58.80 81.67% 84.344872 502.3471 1197.0938
07G22 26 91 60.77 66.78% 80.672195 611.8443 1479.7078
07G23 10 50 23.72 47.44% 70.703979 719.1343 1196.4078
07G24 30 75 88.71 118.28% 130.593018 680.2936 1933.3115
07G25 36 75 124.03 165.37% 41.979919 564.5368 1075.0486
07G26 24 52 30.61 58.86% 43.736134 558.2624 966.7025
07G27 20 75 50.27 67.02% 43.679813 670.4116 1385.6671
07G28 61 75 82.66 110.21% 107.804443 984.4391 2019.0012
07G29 98 104 119.37 114.78% 16.573118 957.5651 2051.8848
07G30 42 72 71.13 98.79% 58.391983 866.9357 2426.0039
07G31 19 78 45.03 57.73% 236.941177 886.2093 1706.3914
07G32 59 78 57.34 73.52% 57.186657 747.9164 1363.8368
07G34 42 100 179.21 179.21% 35.297066 1159.7623 2235.2996
07G35 33 75 87.59 116.79% 85.617516 747.5166 1482.5903
07G36 39 100 60.36 60.36% 137.395325 1021.0702 2236.2500
07G37 54 69 84.71 122.77% 109.733894 1290.1583 2135.0298

ndH und LMB-Daten (nicht offen)

ndh_lmb = read_excel('download/LMB_ndH2015.xlsx') %>% select(
  entity_id=BSN,
  ndH=`Anteil ndH`,
  LMB=`Anteil LMB`
)
ndh_lmb

Daten für die App speichern

Neue Daten

Entities / Schulen

entities = subset(re_schulstand_df, spatial_name %in% relevant_schools) %>%
  rename(entity_id = spatial_name) %>%
  select(-gml_id, -spatial_alias, -spatial_type) %>%
  inner_join(rename(relevant_kapas, capacity=Kapa), by=c('entity_id'='Schulnummer')) %>%
  left_join(ndh_lmb, by=c('entity_id'))
coordinates(entities) = ~ lon + lat
proj4string(entities) = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
file.remove('app/data/entities.geojson')
[1] TRUE
entities %>% writeOGR('app/data/entities.geojson', layer="entities", driver="GeoJSON", check_exists=F)
entities@data %>% select(entity_id, capacity) %>% write_csv('app/data/entities.csv')

Units / Statistische Blöcke

units = subset(blk, BEZ == bez_id)

units@data$PLR = NULL
units@data$Einw = NULL
units@data$BEZ = NULL
units@data$unit_id = units@data$BLK
units@data$BLK = NULL

units = units %>% sp::merge(
  optim_kids_in_blks %>%
    left_join(sgbII_blk) %>%
    select(unit_id=BLK, population=kids, sgbIIu65)
  )
Joining, by = "BLK"
file.remove('app/data/units.geojson')
[1] TRUE
units %>% writeOGR('app/data/units.geojson', layer="entities", driver="GeoJSON", check_exists=F)
units@data %>% write_csv('app/data/units.csv')

Zuordnung

assignment = solution %>% select(unit_id=BLK, entity_id=school)
assignment %>% write_csv('app/data/assignment.csv')

Weights / Schulwege

travel_from_blks %>%
  rename(unit_id=BLK, entity_id=dst) %>%
  #gather(weight, value, -unit_id, -entity_id) %>%
  write_csv('app/data/weights.csv')

Zusätzliche Daten

file.copy('download/RBS_OD_BEZ_2015_12.geojson', 'app/data/RBS_OD_BEZ_2015_12.geojson')
[1] FALSE
---
title: "R Notebook"
output: html_notebook
---

```{r libs, include=F, warning=F}
library(readr)
library(readxl)
library(rgdal)
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggmap)
library(purrr)
library(knitr)
library(broom)
library(maptools)
library(rgeos)
```

Dieses Notebook bereitet die Daten für die Intelligent Zoning Engine vor. Es speichert

- entities.geojson — Schulen, deren Geokoordinaten und Attribute: entity_id, capacity, und andere Attribute
- entities.csv — Statistische Blöcke Berlins und optimierungsrelevante Attribute: entity_id, capacity
- units.geojson — Statistische Blöcke Berlins, deren Geometrie und Attribute
- units.csv — Statsitische Blöcke Berlins und optimierungsrelevante Attribute: unit_id, population, percentage_sgb
- weights.csv — optimierungsrelevante Gewichte wie Fußwege / Spalten: entity_id, unit_id, weight, value
- assignment.csv — eine initiale Zuordnung / Spalten: unit_id, entity_id

Bereits in anderen scripten wurde vorbereitet:

- Fußwegen von einer großen Sichprobe von (_Wohn_-)Gebäuden zu allen Schulen wurden berechnet und in `route_matrix.csv` gespeichert
- Die Stichprobe wurde in `sampled_buildings.csv` gespeichert

Die Daten werden wiefolgt vorbereitet:

- pro Block werden die Anzahl der einzuschulenden Kinder mit Hilfe der Einwohnerzahlen nach Alter auf LOR-Ebene in `EWR201512E_Matrix.csv` hochgerechnet
    - Kinder des LOR werden Anteilig nach Einwohnerzahl des Blocks im Verhältnis zum LOR auf die Blöcke verteilt
- es werden minimale, durchschnittliche und maximale Fußwege aus jedem Block errechnet

TODO:
- die sozioökonomischen Faktoren werden aus den Wahlbezirken auf die Blöcke hochgerechnet (https://github.com/berlinermorgenpost/cogran)

## Laden der Daten

```{r load data, message=FALSE, warning=FALSE}
sampled_buildings = read_rds('output/sampled_buildings.rds')
bez = readOGR('download/RBS_OD_BEZ_2015_12.geojson', layer = 'OGRGeoJSON', stringsAsFactors = FALSE)
blk = readOGR('download/RBS_OD_BLK_2015_12.geojson', layer = 'OGRGeoJSON', stringsAsFactors = FALSE)
lor = readOGR('download/RBS_OD_LOR_2015_12.geojson', layer = 'OGRGeoJSON', stringsAsFactors = FALSE)
re_schulstand = readOGR('download/re_schulstand.geojson', layer = 'OGRGeoJSON', stringsAsFactors = FALSE)
```

### Schulwege

```{r}
route_matrix = read_rds('output/route_matrix.rds')
```

### Schulkapazitäten und Einwohnerzahler auf LOR-Ebene

```{r}
kapas = read_csv('download/anmeldezahlen.csv') %>% filter(grepl('G', Schulnummer)) %>% filter(!is.na(`Plätze`))
einwohner_lor = read_delim('download/EWR201512E_Matrix.csv', delim=';')
```

## Überprüfung der Vollständigkeit der Daten über Anmeldezahlen/Kapazitäten

```{r}
re_schulstand_df = re_schulstand %>% as.data.frame() %>% rename(lon=coords.x1, lat=coords.x2)
re_schulstand_df %>% filter(grepl('G', spatial_name)) %>% mutate(BEZIRK=enc2utf8(as.character(BEZIRK))) %>%
  group_by(BEZIRK) %>% summarise(`Anzahl Schulen` = n()) %>%
  rename(Bezirk=BEZIRK) %>% left_join(kapas %>% group_by(Bezirk) %>% summarise(`Mit Kapazität` = n()))
```

Für welche Bezirke haben wir für alle Schulen Kapazitäten gegeben?

```{r}
re_schulstand %>% as.data.frame() %>% filter(grepl('G', spatial_name)) %>% mutate(BEZIRK=enc2utf8(as.character(BEZIRK))) %>% group_by(BEZIRK) %>% summarise(`Anzahl Schulen` = n()) %>%
  rename(Bezirk=BEZIRK) %>% left_join(kapas %>% group_by(Bezirk) %>% summarise(`Mit Kapazität` = n())) %>% filter(`Anzahl Schulen` == `Mit Kapazität`)
```

Überprüfung ob die Liste der Schulen und Liste der Schulen mit Kapazitätsinformationen gleich sind:

```{r}
bezirk = 'Tempelhof-Schöneberg'
schulen_mit_kapa = kapas %>% filter(Bezirk == bezirk) %>% .$Schulnummer
schulen_mit_kapa
grundschulen = re_schulstand %>% as.data.frame() %>% filter(grepl('G', spatial_name)) %>% filter(BEZIRK == bezirk) %>% .$spatial_name
'In Anmeldeliste, fehlt in Schulstand'
setdiff(schulen_mit_kapa, grundschulen)
'In re_schulstand, fehlt in Anmeldeliste'
setdiff(grundschulen, schulen_mit_kapa)
```


```{r}
map = get_map('Berlin')
```


```{r}
re_schulstand_df_w_kapas = re_schulstand_df %>% left_join(kapas, by=c('spatial_name'='Schulnummer'))
```

Plot aller Schulen, mit der Info, ob Kapazitätsinformationen verfügbar sind.

```{r}
data = re_schulstand_df_w_kapas %>% filter(grepl('G', spatial_name)) %>% filter(BEZIRK==bezirk) %>% mutate(missing.capa=is.na(`Plätze`))
ggmap(map) + geom_point(aes(lon, lat, color=missing.capa), data=data) +
    coord_map(xlim=c(min(data$lon)-0.01, max(data$lon)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))
```

Filter auf Schulen mit Kapazitätsinformationen (für T-S sind das alle):

```{r}
relevant_schools = re_schulstand_df_w_kapas %>% filter(grepl('G', spatial_name)) %>% filter(BEZIRK==bezirk & !is.na(`Plätze`)) %>% .$spatial_name
relevant_schools
```

## Mapping Bezirk->LOR->Block

```{r}
df_bez = as.data.frame(bez)
df_lor = as.data.frame(lor)
df_blk = as.data.frame(blk)
```

### Sanity-Check: LORs und Blöcke im Bezirk

```{r}
bez_id = filter(df_bez, BEZNAME == bezirk)$BEZ
relevant_lors = df_lor %>% filter(BEZ == bez_id)
relevant_blks = df_blk %>% filter(BEZ == bez_id)
```

```{r}
ggplot() + geom_path(aes(x=long, y=lat, group=group), data=lor[lor$BEZ == bez_id,]) + coord_map() + geom_path(aes(x=long, y=lat, group=group), data=bez, color='red')
```

### Blöcke im Bezirk

```{r}
ggplot() + geom_path(aes(x=long, y=lat, group=group), data=blk[blk$BEZ == bez_id,]) + coord_map() + geom_path(aes(x=long, y=lat, group=group), data=bez[bez$BEZ == bez_id,], color='red')
```

## Kinder im Bezirk auf Blöcke hochrechnen

Über die Einwohnerinformationen in `RBS_OD_BLK_2015_12.geojson` kann `EWR201512E_Matrix.csv` von LOR-Ebene auf Blockebene hochgerechnet werden.

TODO: Stattdessen mit https://github.com/berlinermorgenpost/cogran machen?

### Plot der 6-Jährigen nach `EWR201512E_Matrix.csv`

Wir verwenden das mittel der 5- und 6-Jährigen.

TODO neue Daten von Torres?
TODO Prognose?

```{r}
relevant_ewr = einwohner_lor %>%
  select(RAUMID, E_E05_06, E_E06_07) %>%
  filter(RAUMID %in% relevant_lors$PLR) %>%
  # Schnitt der 5 und 6-Jährigen
  mutate(kids=(as.numeric(gsub(',','.',E_E06_07))+as.numeric(gsub(',','.',E_E05_06)))/2) %>% as.data.frame()

data = tidy(lor[lor$BEZ == bez_id,], region='PLR') %>% inner_join(relevant_ewr, by=c('id'='RAUMID'))
ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group, fill=kids), data=data) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))
```

### Plot der Einwohner auf Blockebene

```{r}
data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% inner_join(df_blk, by=c('id'='BLK')) %>% mutate(Einw=ifelse(Einw==0, NA, Einw))
0
ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group, fill=Einw), data=data) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))
```


### Hochrrechnung auf Blöcke, Strukturquote

```{r}
Strukturquote = 0.9

kids_in_blks = relevant_blks %>%
  group_by(PLR) %>%
  mutate(EinwRatio = Einw/sum(Einw)) %>%
  ungroup %>%
  left_join(relevant_ewr, by=c('PLR'='RAUMID')) %>%
  mutate(kids = EinwRatio*kids) %>%
  mutate(kids = Strukturquote*kids) %>% # Strukturquote
  select(BEZ, PLR, BLK, Einw, kids) %>%
  as.data.frame()
row.names(kids_in_blks) = kids_in_blks$BLK

data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% inner_join(kids_in_blks, by=c('id'='BLK')) %>% mutate(kids=ifelse(kids==0, NA, kids), Einw=ifelse(Einw==0, NA, Einw))

ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group, fill=kids), data=data) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))
```

## Verfügbare Plätze

```{r}
relevant_kapas = kapas %>% select(Schulnummer, Kapa=`Plätze`) %>% filter(Schulnummer %in% relevant_schools) %>% as.data.frame()
#row.names(relevant_kapas) = relevant_kapas$Schulnummer
```

### Überprüfung der Summe der Kapazitäten, Anmeldungen und Kinderstatistiken

```{r}
'Summe Kapas'
relevant_kapas %>% .$Kapa %>% sum
'Anmeldungen'
kapas %>% mutate(Anmeldungen = as.numeric(gsub('[^0-9]', '', Anmeldungen))) %>% filter(Schulnummer %in% relevant_schools) %>% .$Anmeldungen %>% sum
'Kids laut Statistik'
kids_in_blks$kids %>% sum
relevant_ewr$kids %>% sum
```

## Schulwege von Blöcken zu Schulen aggregieren

Für jedes Wohngebäude suchen wir den zugehörigen Block

```{r}
residential_buildings_blocks = sampled_buildings %>% inner_join(df_blk) %>% filter(BEZ == bez_id)
residential_buildings_blocks
```

```{r}
routes_from_blks = residential_buildings_blocks %>%
  left_join(route_matrix %>% filter(dst %in% relevant_schools), by=c('OI'='src'))
head(routes_from_blks)
```

### Plot der relevanten Blöcke (mit Wohngebäuden)

```{r}
data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% inner_join(routes_from_blks %>% group_by(BLK) %>% summarise(n=n()), by=c('id'='BLK'))
ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group), fill='red', data=data) +
  #geom_point(aes(x=lon, y=lat), data=rb_df, color='black', size=0.01) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))
```


```{r}
travel_from_blks = routes_from_blks %>% as.data.frame() %>% group_by(BLK, dst) %>% summarise(min=min(distance), avg=mean(distance), med=median(distance), max=max(distance)) %>% ungroup
travel_from_blks
```

### Plot der Blöcke mit Färbung nach durchschnittlichem Weg zur nächsten Schule

```{r}
data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% left_join(travel_from_blks %>% group_by(BLK) %>% top_n(1, -avg), by=c('id'='BLK'))
ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group, fill=-avg), data=data) +
  geom_point(aes(lon, lat), color='red', data = re_schulstand_df %>% filter(BEZIRK==bezirk & SCHULART=='Grundschule')) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))
```


```{r}
travel_matrix = travel_from_blks %>% select(BLK, dst, avg) %>% spread(dst, avg)
dim(travel_matrix)
travel_matrix
```

## Sozioökonomische Daten

Wir haben Sozioökonomische Daten in den Wahlbezirken. Strategie:
- Schneiden der Wahlbezirke mit den Blöcken
- Übernahme des Prozentwertes vom Wahlbezirk für jeden (Unter-)block
- Zuordnung zu jedem Block und Vereinigung durch Flächen/Wohnhaus-gewichtetes Mittel des Prozentwertes 

```{r}
UWB = readOGR('download/RBS_OD_UWB_AGH2016.geojson', layer = 'OGRGeoJSON', stringsAsFactors = FALSE)
UWB$ID = paste0(UWB$BEZ, UWB$UWB)
sozio_UWB = read_excel('download/DL_BE_AH2016_Strukturdaten.xlsx', sheet = 3) %>% select(ID, sgbIIu65=`Einwohner unter 65 in SGB II 2014 Prozent`)
UWB = UWB %>% sp::merge(sozio_UWB, by='ID')

ggplot(broom::tidy(UWB, region='ID') %>% inner_join(UWB %>% as.data.frame, by=c('id'='ID'))) + geom_polygon(aes(x=long, y=lat, group=group, fill=sgbIIu65)) + coord_map()
```

Check for self intersections
```{r}
wgs84 = CRS(proj4string(blk))
ea_projection = CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs")
blk_in_bez = blk[blk$BEZ == bez_id,]
gIsValid(blk_in_bez)
blk_in_bez = gBuffer(spTransform(blk_in_bez, ea_projection), byid=T, width = -0.1)
any(xor(gIntersects(blk_in_bez, byid=T, returnDense = T), diag(1, length(blk_in_bez)) == 1))
plot(blk_in_bez)
#gIntersects(UWB[UWB$BEZ == '07',], byid=TRUE)
```

```{r}
uwb_in_bez = gBuffer(spTransform(UWB[UWB$BEZ == '07',], ea_projection), byid=T, width=-0.1)
gIsValid(uwb_in_bez)
any(xor(gIntersects(uwb_in_bez, byid=T, returnDense = T), diag(1, length(uwb_in_bez)) == 1))
plot(uwb_in_bez)
```

```{r}
intersection = gIntersection(uwb_in_bez, blk_in_bez, byid = T, drop_lower_td = T)

gIsValid(intersection)
plot(intersection)
```

```{r}
intersection_uwb_data = intersection %>% over(uwb_in_bez)
intersection_blk_data = intersection %>% over(blk_in_bez)
intersection_area = gArea(intersection, byid=T)
intersection_data = cbind(
    intersection_uwb_data %>% select(UWB_ID=ID, sgbIIu65),
    intersection_blk_data %>% select(BLK, BLK_Einw=Einw),
    data.frame(area=intersection_area) # FIXME how else to normalize?
    )
```

```{r}
length(intersection)
nrow(blk_in_bez)
# did we miss any blocks?
setdiff(blk_in_bez$BLK, intersection_data$BLK)
```

Pro Block mische die SGB-Werte der Unterblöcke gewichtet nach Fläche.
FIXME - besser nach Anzahl der Wohnhäuser?
```{r}
sgbII_blk = intersection_data %>%
  group_by(BLK) %>%
  summarise(sgbIIu65=sum(sgbIIu65*area)/sum(area)/100)
```


```{r}
ggplot(broom::tidy(spTransform(blk_in_bez, wgs84), region='BLK') %>% left_join(sgbII_blk, by=c('id'='BLK'))) + geom_polygon(aes(x=long, y=lat, group=group, fill=sgbIIu65)) + coord_map()
```

## Adjazenz von Blöcken

```{r}
row.names(blk_in_bez) = blk_in_bez$BLK
buffeded_blk_in_bez = gBuffer(blk_in_bez, byid=T, width=40)
adjacency = gIntersects(gBuffer(blk_in_bez, byid=T, width=40), byid = T)
rownames(adjacency) = blk_in_bez$BLK
colnames(adjacency) = blk_in_bez$BLK

adjacency_df = adjacency %>% as.data.frame() %>% mutate(from=rownames(.)) %>%
  gather(to, connected, -from) %>% filter(connected) %>%
  inner_join(spTransform(blk_in_bez, wgs84) %>% coordinates() %>% as.data.frame() %>% rename(from_long=V1, from_lat=V2) %>% mutate(from=rownames(.))) %>%
  inner_join(spTransform(blk_in_bez, wgs84) %>% coordinates() %>% as.data.frame() %>% rename(to_long=V1, to_lat=V2) %>% mutate(to=rownames(.)))

ggplot() +
  geom_polygon(aes(x=long, y=lat, group=group), fill='gray', data=broom::tidy(spTransform(blk_in_bez, wgs84), region='BLK')) + 
  geom_segment(aes(x=from_long, y=from_lat, xend=to_long, yend=to_lat), size=0.1, color='black', data=adjacency_df) +
  theme_nothing() + coord_map()

ggsave('figs/adjacency.pdf')
adjacency_df %>% filter(connected & from != to) %>% select(from, to) %>% write_csv('app/data/adjacency.csv')
```


## Relevante Daten auswählen

```{r}
optim_kapas = relevant_kapas
optim_kids_in_blks = kids_in_blks %>% filter(kids > 0) %>% inner_join(travel_matrix, by='BLK') %>% select(BLK, kids) %>% mutate(kids=kids)
nrow(optim_kids_in_blks)
nrow(optim_kapas)

select_schools = as.character(optim_kapas$Schulnummer)
select_blks = as.character(optim_kids_in_blks$BLK)

optim_matrix = inner_join(optim_kids_in_blks, travel_matrix, by='BLK')[select_schools]

dim(optim_matrix)

optim_kapas$Kapa %>% sum
optim_kids_in_blks$kids %>% sum
```

## Naive (initiale) Zuordnung: Jeder Block zur nächsten Schule

```{r}
solution = optim_matrix %>% mutate(BLK=optim_kids_in_blks$BLK) %>% gather(school, dist, -BLK) %>% group_by(BLK) %>% top_n(1, -dist) %>% ungroup

optim_matrix %>% t %>% as.data.frame %>% summarise_each(funs(min)) %>% sum()

solines = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school')) %>% inner_join(cbind(as.data.frame(coordinates(blk[blk$BEZ == bez_id,])), blk[blk$BEZ == bez_id,]@data['BLK']))

data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% left_join(solution, by=c('id'='BLK'))
ggmap(map, darken = c(0.5, 'white')) + geom_polygon(aes(x=long, y=lat, group=group, fill=school), data=data) +
  geom_segment(aes(x=V1,y=V2,xend=lon,yend=lat), data=solines, size=0.3) +
  geom_point(aes(lon, lat), color='black', size=2, data = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school'))) +
  geom_point(aes(lon, lat, color=spatial_name), data = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school'))) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01)) +
  guides(color=F, fill=F)
```

## Darstellung der Zuordnung als Tabelle

```{r}
library(formattable)
```

```{r}
solution %>% inner_join(optim_kids_in_blks, by='BLK') %>% inner_join(travel_from_blks, by=c('BLK'='BLK', 'school'='dst')) %>%
  group_by(school) %>% summarise(
    kids=sum(kids),
    num_blocks=n(),
    min_dist=min(min),
    avg_dist=mean((kids*avg)/sum(kids)),
    max_dist=max(max)
  ) %>%
  inner_join(relevant_kapas, by=c('school'='Schulnummer')) %>%
  mutate(
    utilization=kids/Kapa
  ) %>% select(
   Schule=school,
   `Blöcke`=num_blocks,
   Kapazität=Kapa,
   Kinder=kids,
   Auslastung=utilization,
   `Weg (min)`=min_dist,
   `Weg (Ø)`=avg_dist,
   `Weg (max)`=max_dist
  ) %>%
  formattable(
    list(
      Kinder = formatter("span", x ~ digits(x, 2)),
      Auslastung = formatter("span",
        style = x ~ style(color = ifelse(x < 1, "green", "red")),
        x ~ icontext(ifelse(x < 1, "ok", "remove"), percent(x))
      ),
      `Weg (Ø)` = proportion_bar("lightblue"),
      `Weg (min)` = proportion_bar("lightblue"),
      `Weg (max)` = proportion_bar("lightblue")
    )
  )
```

## ndH und LMB-Daten (nicht offen)

```{r}
ndh_lmb = read_excel('download/LMB_ndH2015.xlsx') %>% select(
  entity_id=BSN,
  ndH=`Anteil ndH`,
  LMB=`Anteil LMB`
)
ndh_lmb
```


## Daten für die App speichern

- entities.geojson
- entities.csv
- units.geojson
- units.csv
- weights.csv
- assignment.csv

### Neue Daten


#### Entities / Schulen

```{r}
entities = subset(re_schulstand_df, spatial_name %in% relevant_schools) %>%
  rename(entity_id = spatial_name) %>%
  select(-gml_id, -spatial_alias, -spatial_type) %>%
  inner_join(rename(relevant_kapas, capacity=Kapa), by=c('entity_id'='Schulnummer')) %>%
  left_join(ndh_lmb, by=c('entity_id'))
coordinates(entities) = ~ lon + lat
proj4string(entities) = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
file.remove('app/data/entities.geojson')
entities %>% writeOGR('app/data/entities.geojson', layer="entities", driver="GeoJSON", check_exists=F)
entities@data %>% select(entity_id, capacity) %>% write_csv('app/data/entities.csv')
```

#### Units / Statistische Blöcke

```{r}
units = subset(blk, BEZ == bez_id)

units@data$PLR = NULL
units@data$Einw = NULL
units@data$BEZ = NULL
units@data$unit_id = units@data$BLK
units@data$BLK = NULL

units = units %>% sp::merge(
  optim_kids_in_blks %>%
    left_join(sgbII_blk) %>%
    select(unit_id=BLK, population=kids, sgbIIu65)
  )

file.remove('app/data/units.geojson')
units %>% writeOGR('app/data/units.geojson', layer="entities", driver="GeoJSON", check_exists=F)
units@data %>% write_csv('app/data/units.csv')
```

#### Zuordnung

```{r}
assignment = solution %>% select(unit_id=BLK, entity_id=school)
assignment %>% write_csv('app/data/assignment.csv')
```

#### Weights / Schulwege

```{r}
travel_from_blks %>%
  rename(unit_id=BLK, entity_id=dst) %>%
  #gather(weight, value, -unit_id, -entity_id) %>%
  write_csv('app/data/weights.csv')
```

#### Zusätzliche Daten

```{r}
file.copy('download/RBS_OD_BEZ_2015_12.geojson', 'app/data/RBS_OD_BEZ_2015_12.geojson')
```

